In [1]:
import pickle
# Fill this in based on where you saved the training and testing data
training_file = "traffic-signs-data/train.p"
validation_file= "traffic-signs-data/valid.p"
testing_file = "traffic-signs-data/test.p"
with open(training_file, mode='rb') as f:
train = pickle.load(f)
with open(validation_file, mode='rb') as f:
valid = pickle.load(f)
with open(testing_file, mode='rb') as f:
test = pickle.load(f)
X_train, y_train = train['features'], train['labels']
X_valid, y_valid = valid['features'], valid['labels']
X_test, y_test = test['features'], test['labels']
The pickled data is a dictionary with 4 key/value pairs:
'features' is a 4D array containing raw pixel data of the traffic sign images, (num examples, width, height, channels).'labels' is a 1D array containing the label/class id of the traffic sign. The file signnames.csv contains id -> name mappings for each id.'sizes' is a list containing tuples, (width, height) representing the original width and height the image.'coords' is a list containing tuples, (x1, y1, x2, y2) representing coordinates of a bounding box around the sign in the image. THESE COORDINATES ASSUME THE ORIGINAL IMAGE. THE PICKLED DATA CONTAINS RESIZED VERSIONS (32 by 32) OF THESE IMAGES
In [2]:
def getDatasetInfo():
# Get the number of training examples
n_train = len(X_train)
# Get the number of validation examples
n_validation = len(X_valid)
# Get the number of testing examples.
n_test = len(X_test)
# Get the shape of an traffic sign image?
image_shape = X_train[1].shape
# Get the number of unique classes/labels there are in the dataset.
n_classes = len(set(y_train))
print("Summary statistics of the traffic signs dataset")
print()
print("Number of training examples =", n_train)
print("Number of testing examples =", n_test)
print("Number of validation examples =", n_validation)
print("Image data shape =", image_shape)
print("Number of classes =", n_classes)
getDatasetInfo()
Loading a random image in color along with its index
In [3]:
### Data exploration visualization code goes here.
### Feel free to use as many code cells as needed.
import matplotlib.pyplot as plt
# Visualizations will be shown in the notebook.
%matplotlib inline
import random
import numpy as np
randomIndex = random.randint(0, len(X_train))
image = X_train[randomIndex].squeeze()
#plt.figure(figsize=(2,2))
plt.imshow(image)
print(image.shape)
print(y_train[randomIndex])
Design and implement a deep learning model that learns to recognize traffic signs. Train and test your model on the German Traffic Sign Dataset.
The LeNet-5 implementation shown in the classroom at the end of the CNN lesson is a solid starting point. You'll have to change the number of classes and possibly the preprocessing, but aside from that it's plug and play!
With the LeNet-5 solution from the lecture, you should expect a validation set accuracy of about 0.89. To meet specifications, the validation set accuracy will need to be at least 0.93. It is possible to get an even higher accuracy, but 0.93 is the minimum for a successful project submission.
There are various aspects to consider when thinking about this problem:
Here is an example of a published baseline model on this problem. It's not required to be familiar with the approach used in the paper but, it's good practice to try to read papers like these.
Minimally, the image data should be normalized so that the data has mean zero and equal variance. For image data, (pixel - 128)/ 128 is a quick way to approximately normalize the data and can be used in this project.
Other pre-processing steps are optional. You can try different techniques to see if it improves performance.
Use the code cell (or multiple code cells, if necessary) to implement the first step of your project.
In [4]:
# Implement Min-Max scaling for grayscale image data
def normalize_grayscale(image_data):
"""
Normalize the image data with Min-Max scaling to a range of [0.1, 0.9]
:param image_data: The image data to be normalized
:return: Normalized image data
"""
a = 0.1
b = 0.9
grayscale_min = 0
grayscale_max = 255
#print("Image Data")
#print(image_data)
normalized_grayscale_image = a + ( ( (image_data - grayscale_min)*(b - a) )/( grayscale_max - grayscale_min ))
#print("Normalized Grayscale Image")
return normalized_grayscale_image
def normalize(image_data):
#print("Image Data")
#print(image_data)
normalizedOutput = (image_data - 128)/128
#print("Normalized Image Data")
#print(normalizedOutput)
return normalizedOutput
normalized_image = normalize_grayscale(image)
plt.imshow(normalized_image)
plt.show()
print(normalized_image.shape)
In [5]:
def flipImage(image):
flipped_image = np.fliplr(image)
return flipped_image
flipped_image = flipImage(image)
plt.imshow(flipped_image)
plt.show()
print(flipped_image.shape)
In [6]:
# Shuffling the data
from sklearn.utils import shuffle
X_train, y_train = shuffle (X_train, y_train)
In [7]:
# Normalize training data
X_train = normalize_grayscale(X_train)
X_test = normalize_grayscale(X_test)
X_valid = normalize_grayscale(X_valid)
#X_train = flipImage(X_train)
#X_test = flipImage(X_test)
#X_valid = flipImage(X_valid)
getDatasetInfo()
In [8]:
import tensorflow as tf
EPOCHS = 100
BATCH_SIZE = 150
keep_prob = 0.9 #dropout
In [9]:
from tensorflow.contrib.layers import flatten
def LeNet(x):
# Arguments used for tf.truncated_normal, randomly defines variables for the weights and biases for each layer
mu = 0
sigma = 0.1
# Layer 1: Convolutional. Input = 32x32x3. Output = 28x28x6.
conv1_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 3, 6), mean = mu, stddev = sigma))
conv1_b = tf.Variable(tf.zeros(6))
conv1 = tf.nn.conv2d(x, conv1_W, strides=[1, 1, 1, 1], padding='VALID') + conv1_b
print("ConvNet 1: ", conv1)
# Activation.
conv1 = tf.nn.relu(conv1)
# Pooling. Input = 28x28x6. Output = 14x14x6.
conv1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
conv1 = tf.nn.dropout(conv1, keep_prob)
# Layer 2: Convolutional. Output = 10x10x16.
conv2_W = tf.Variable(tf.truncated_normal(shape=(5, 5, 6, 16), mean = mu, stddev = sigma))
conv2_b = tf.Variable(tf.zeros(16))
conv2 = tf.nn.conv2d(conv1, conv2_W, strides=[1, 1, 1, 1], padding='VALID') + conv2_b
print("ConvNet 2: ", conv2)
# Activation.
conv2 = tf.nn.relu(conv2)
# Pooling. Input = 10x10x16. Output = 5x5x16.
conv2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='VALID')
conv2 = tf.nn.dropout(conv2, keep_prob)
# Flatten. Input = 5x5x16. Output = 400.
fc0 = flatten(conv2)
print("Fully Connected 0: ", fc0)
# Layer 3: Fully Connected. Input = 400. Output = 120.
fc1_W = tf.Variable(tf.truncated_normal(shape=(400, 120), mean = mu, stddev = sigma))
fc1_b = tf.Variable(tf.zeros(120))
fc1 = tf.matmul(fc0, fc1_W) + fc1_b
fc1 = tf.nn.dropout(fc1, keep_prob) #keep_prob should be set to one for validation accuracy
# Activation.
fc1 = tf.nn.relu(fc1)
print("Fully Connected 1: ", fc1)
# Layer 4: Fully Connected. Input = 120. Output = 84.
fc2_W = tf.Variable(tf.truncated_normal(shape=(120, 84), mean = mu, stddev = sigma))
fc2_b = tf.Variable(tf.zeros(84))
fc2 = tf.matmul(fc1, fc2_W) + fc2_b
fc2 = tf.nn.dropout(fc2, keep_prob)
# Activation.
fc2 = tf.nn.relu(fc2)
print("Fully Connected 2: ", fc2)
# Layer 5: Fully Connected. Input = 84. Output = 10.
fc3_W = tf.Variable(tf.truncated_normal(shape=(84, 43), mean = mu, stddev = sigma))
fc3_b = tf.Variable(tf.zeros(43))
logits = tf.matmul(fc2, fc3_W) + fc3_b
print("Logits: ", logits)
return logits
x is a placeholder for a batch of input images. y is a placeholder for a batch of output labels.
In [10]:
x = tf.placeholder(tf.float32, (None, 32, 32, 3))
y = tf.placeholder(tf.int32, (None))
one_hot_y = tf.one_hot(y, 43)
A validation set can be used to assess how well the model is performing. A low accuracy on the training and validation sets imply underfitting. A high accuracy on the training set but low accuracy on the validation set implies overfitting.
In [11]:
### Train your model here.
### Calculate and report the accuracy on the training and validation set.
### Once a final model architecture is selected,
### the accuracy on the test set should be calculated and reported as well.
In [12]:
rate_of_learning = 0.001
logits = LeNet(x)
cross_entropy = tf.nn.softmax_cross_entropy_with_logits(labels=one_hot_y, logits=logits)
loss_operation = tf.reduce_mean(cross_entropy)
optimizer = tf.train.AdamOptimizer(learning_rate = rate_of_learning)
training_operation = optimizer.minimize(loss_operation)
In [13]:
correct_prediction = tf.equal(tf.argmax(logits, 1), tf.argmax(one_hot_y, 1))
accuracy_operation = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
saver = tf.train.Saver()
def evaluate(X_data, y_data):
num_examples = len(X_data)
total_accuracy = 0
sess = tf.get_default_session()
for offset in range(0, num_examples, BATCH_SIZE):
batch_x, batch_y = X_data[offset:offset+BATCH_SIZE], y_data[offset:offset+BATCH_SIZE]
accuracy = sess.run(accuracy_operation, feed_dict={x: batch_x, y: batch_y})
total_accuracy += (accuracy * len(batch_x))
return total_accuracy / num_examples
In [14]:
training_save_file = './lenet.ckpt'
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
num_examples = len(X_train)
print("Training...")
print()
for i in range(EPOCHS):
X_train, y_train = shuffle(X_train, y_train)
for offset in range(0, num_examples, BATCH_SIZE):
end = offset + BATCH_SIZE
batch_x, batch_y = X_train[offset:end], y_train[offset:end]
sess.run(training_operation, feed_dict={x: batch_x, y: batch_y})
#training_accuracy = evaluate(X_train, y_train)
#testing_accuracy = evaluate(X_test, y_test)
validation_accuracy = evaluate(X_valid, y_valid)
print("EPOCH {} ...".format(i+1))
#print("Training Accuracy = {:.3f}".format(training_accuracy))
#print("Test Accuracy = {:.3f}".format(test_accuracy))
print("Validation Accuracy = {:.3f}".format(validation_accuracy))
print()
saver.save(sess, training_save_file)
print("Model saved")
In [15]:
with tf.Session() as sess:
saver.restore(sess, tf.train.latest_checkpoint('.'))
training_accuracy = evaluate(X_train, y_train)
test_accuracy = evaluate(X_test, y_test)
validation_accuracy = evaluate(X_valid, y_valid)
print("Training Accuracy = {:.3f}".format(training_accuracy))
print("Test Accuracy = {:.3f}".format(test_accuracy))
print("Validation Accuracy = {:.3f}".format(validation_accuracy))
To give yourself more insight into how your model is working, download at least five pictures of German traffic signs from the web and use your model to predict the traffic sign type.
You may find signnames.csv useful as it contains mappings from the class id (integer) to the actual sign name.
In [51]:
import cv2
import os
import matplotlib.image as mpimage
directory = "german-traffic-signs/"
new_images = os.listdir(directory)
# Retrieve only images with valid image extensions
image_extenstions = ['jpg', 'bmp', 'png', 'gif']
new_images = [imgname for imgname in os.listdir(directory)
if any(imgname.endswith(ext) for ext in image_extenstions)]
# Show the images, add to a list to process for classifying
new_images_data = []
for image in new_images:
imagePath = directory + image
image = mpimage.imread(imagePath)
print(image.shape)
# Remove alpha channel from image so it only has 3 RGB channels
# Length of grayscale images is 2 i.e., len(image.shape) == 2
if len(image.shape) > 2 and image.shape[2] == 4:
#convert the image from RGBA2RGB
image = cv2.cvtColor(image, cv2.COLOR_BGRA2BGR)
new_images_data.append(image)
plt.imshow(image)
plt.colorbar()
plt.show()
# Convert the new_images data to match the LeNet training dataset image shape (32 x 32 x 3)
#print(new_images_data)
new_images_data = np.array(new_images_data)
print(new_images_data.shape)
In [52]:
signNames = np.genfromtxt("signnames.csv", dtype=None, delimiter=",", names=True)
#signName = [name[1].decode('utf-8') for name in signNames]
for i in range(0, len(signNames)):
print(signNames[i])
In [53]:
# Preprocess new_images by normalizing
new_images_data = normalize_grayscale(new_images_data)
print(new_images_data.shape)
In [55]:
# Testing the model on the new images
probability_to_keep = tf.placeholder(tf.float32)
with tf.Session() as sess:
saver.restore(sess, training_save_file)
new_images_classification = sess.run(logits, feed_dict={x: new_images_data, probability_to_keep : 1.0})
print(new_images_classification)
In [57]:
##Applying softmax to logits
with tf.Session() as sess:
softmax_probs = sess.run(tf.nn.softmax(new_images_classification))
print(softmax_probs)
In [59]:
### Print out the top five softmax probabilities for the predictions on the German traffic sign images found on the web.
## Finding top_k on the softmax outputs
with tf.Session() as sess:
#predictions = sess.run(tf.nn.top_k(tf.constant(new_images_classification), k=5))
predictions = sess.run(tf.nn.top_k(tf.constant(softmax_probs), k=5))
for i in range(len(predictions[0])):
print('The image', i, 'is about', predictions[0][i], 'chance to be the sign with index:', predictions[1][i])